# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : 全网热点聚焦爬取.py
# @Author: dongguangwen
# @Date  : 2024-07-06 13:33

import requests
from lxml import etree


url = 'https://www.5guanjianci.com/hot'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
}
resp = requests.get(url=url, headers=headers)
# print(resp.text)
html = etree.HTML(resp.text)

# 百度热搜词
contents = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[1]/div/div[2]/ul/li/p/a/text()')
href = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[1]/div/div[2]/ul/li/p/a/@href')
nums = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[1]/div/div[2]/ul/li/span[2]/text()')
for content in zip(contents, href, nums):
    print(content)

# 百度热搜
contents = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[2]/div/div[2]/ul/li/p/a/text()')
href = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[2]/div/div[2]/ul/li/p/a/@href')
for content in zip(contents, href):
    print(content)

# 头条热搜
contents = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[3]/div/div[2]/ul/li/p/a/text()')
href = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[3]/div/div[2]/ul/li/p/a/@href')
for content in zip(contents, href):
    print(content)

# 抖音热搜
contents = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[4]/div/div[2]/ul/li/p/a/text()')
href = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[4]/div/div[2]/ul/li/p/a/@href')
for content in zip(contents, href):
    print(content)


# 微博热搜
contents = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[5]/div/div[2]/ul/li/p/a/text()')
href = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[5]/div/div[2]/ul/li/p/a/@href')
for content in zip(contents, href):
    print(content)

# 360搜索热搜
contents = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[6]/div/div[2]/ul/li/p/a/text()')
href = html.xpath('//*[@id="__layout"]/div/div[1]/div[2]/div[3]/div[6]/div/div[2]/ul/li/p/a/@href')
for content in zip(contents, href):
    print(content)
